"""
数据计算-reduceByKey
功能：对每个key的value进行聚合操作
"""
from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = "D:/dev/python/python310/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("reduceByKey")
sc = SparkContext(conf=conf)
# 创建 RDD
rdd = sc.parallelize([("a", 1), ("b", 2), ("a", 3), ("b", 4), ("a", 5)])
# reduceByKey 方法
# reduceByKey 方法可以对每个 key 的 value 进行聚合操作
rdd2 = rdd.reduceByKey(lambda x, y: x + y)
print(rdd2.collect())
sc.stop()